Presentation 5: Graphs
RThere are three main options for doing graphics in R:
graphics library (preinstalled).ggplot2 library, which is more flexible but harder to use.lattice library, which I am unfamiliar with.We focus on base graphics.
Change the color of the elements to cyan.
Remove all text from the plot, leaving only the points.
Make a scatterplot, with the same data, using triangles. To do this, find the pch code for a triangle (google, e.g., pch r there are multiple!) and make a plot.
Use rownames(mtcars) to color your three favorite cars in blue.
My favorite cars are Hornet 4 Drive and Hornet Sportabout. They have indices 4 and 5.
Change the point style of the red elements to a filled diamond.
colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = pchs,
cex = cexes)colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = pchs,
cex = cexes)
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)colors <- rep("black", length(mtcars$mpg))
colors[c(14, 28)] <- "red"
cexes <- rep(1.6, length(mtcars$mpg))
cexes[c(14, 28)] <- 4
pchs <- rep(20, length(mtcars$mpg))
pchs[c(14,28)] <- 18
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = colors,
pch = pchs,
cex = cexes)
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)xlim and ylim::: {.panel-tabset}
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information",
col = "red",
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)Transparency can be added using the rbg function. The final argument is the level of transparency.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)Make the red points even more transparent. Add the black points to the plot.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.9),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l")
grid()The grid line at approximately (15, 300) doesn’t look good, since it’s plotted above the points. Use the
pointfunction to fix the problem.
ablineplot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
ylim = c(0, 500),
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")Inspect the documentation of
ablineand add a purple line with slope1000and intercept-50.
plot(x = mtcars$mpg,
y = mtcars$disp,
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = "black",
cex = 1.6,
pch = 20,
bty = "l",
ylim = c(0, 500),
xlim = c(0, 40))
grid()
points(x = mtcars$mpg,
y = mtcars$disp,
col = "black",
cex = 1.6,
pch = 20)
abline(v = 10, lty = 1, lwd = 2, col = "red")
abline(h = 50, lty = 1, lwd = 2, col = "blue")
abline(a = 1000, b = -50, lwd = 2, col = "purple")The text function adds labels to a plot.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)))
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)])The labels overlapp the points. We must fix this manually.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))The loess function estimates a scatterplot smoother.
Let’s use a line plot.
We must sort the miles per gallons to get a good plot.
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")The scatterplot smoother is plotted above the data points, crearting an ugly effec (see mpg ~ 30). Modify the code so the points are plotted on top of the smoother.
model <- loess(disp ~ mpg, data = mtcars)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright", legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"), col = c("grey", "black", "red"))plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", "red"),
lwd = c(2, NA, NA),
pch = c(NA, 20, 16),
lty = c(3, NA, NA))plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(2, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
bty = "n")Modify the code so that the “Scatterplot smoother” appears at the bottom of the legend, not the top.
pdf(file = "cars.pdf", width = 8, height = 8)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
main = "Important information concerning automobiles",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 2, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(2, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
bty = "n")
dev.off()png
2
pdf(file = "cars.pdf", width = 8, height = 8)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
cex.lab = 1.5,
cex.main = 2,
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 3, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(3, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
cex = c(1.3,1.3,1.3),
bty = "n")
dev.off()png
2
Some journals do not accept pdf graphs.
BMP, JPEG, PNG and TIFF. Avoid these if possible, as they are lossy.eps is a PostScript format that is often accepted.svg is probably the preferred format.svg("cars.svg", width = 8, height = 8)
plot(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
xlab = "Miles per gallon",
ylab = "Displacement",
sub = "One gallon equals ~3.8 litres",
col = rgb(1, 0, 0, 0.6),
pch = 18,
cex = 4,
xlim = c(min(mtcars$mpg), max(mtcars$mpg)),
ylim = c(min(mtcars$disp), max(mtcars$disp)),
cex.lab = 1.5,
cex.main = 2,
bty = "l")
points(x = mtcars$mpg[-c(14, 28)],
y = mtcars$disp[-c(14, 28)],
pch = 20,
col = "black",
cex = 1.6)
text(x = mtcars$mpg[c(14, 28)],
y = mtcars$disp[c(14, 28)],
labels = rownames(mtcars)[c(14, 28)],
pos = c(1, 3),
offset = c(1,1))
x <- sort(mtcars$mpg)
lines(x, predict(model, x), lwd = 3, lty = 3, col = "grey")
legend("topright",
legend = c("Scatterplot smoother", "Unimportant cars", "Important cars"),
col = c("grey", "black", rgb(1, 0, 0, 0.6)),
lwd = c(3, NA, NA),
pch = c(NA, 20, 18),
lty = c(3, NA, NA),
pt.cex = c(0, 1, 4/1.6),
cex = c(1.3,1.3,1.3),
bty = "n")
dev.off()png
2
In Norway we often use liter per 10 kilometers (Norwegian mile). Make a variable
lptthat contains the liter per 10 kilometers. Use the conversion factorgallon = 3.78531.
xlabylabmainlinespointsaddcolortypeltypchChange the color of
hpto green and displacement to yellow.
boxplot(mtcars$hp, mtcars$disp, names = c("Horsepower", "Displacement"), col = c("green", "yellow"))Add an informative title to the plot. Change the colors back to something pleasant.
boxplot(mtcars$hp, mtcars$disp, names = c("Horsepower", "Displacement"), col = c("white", "white"), main = "Distribution of car properties")Make a boxplot of
wt,drat, andcyl. Add labels and title.
Change the color of the histogram to white. Make the histogram a density histogram, i.e., the integral should integrate to 1. (By default, it shows counts.)
hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", color = "white", freq = FALSE)The default value for
breaks(the number of bins + 1) is too small by default, asRuses an outdated method for calculating the breaks. Experiment with differentbreaksand choose a “pleasing” one.
hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", color = "white", freq = FALSE, breaks = 10)Add a call to
rugonmtcars$mpgbelow the histogram.
hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE, breaks = 10)
rug(mtcars$mpg)We are sometimes interested in evaluating wether a dataset is approximately normal. We can do this visually (but unreliably!) by plotting a normal density curve on top of a histogram.
mu <- mean(mtcars$mpg)
sigma <- sd(mtcars$mpg)
x <- seq(min(mtcars$mpg), max(mtcars$mpg), length.out = 100)
x [1] 10.40000 10.63737 10.87475 11.11212 11.34949 11.58687 11.82424 12.06162
[9] 12.29899 12.53636 12.77374 13.01111 13.24848 13.48586 13.72323 13.96061
[17] 14.19798 14.43535 14.67273 14.91010 15.14747 15.38485 15.62222 15.85960
[25] 16.09697 16.33434 16.57172 16.80909 17.04646 17.28384 17.52121 17.75859
[33] 17.99596 18.23333 18.47071 18.70808 18.94545 19.18283 19.42020 19.65758
[41] 19.89495 20.13232 20.36970 20.60707 20.84444 21.08182 21.31919 21.55657
[49] 21.79394 22.03131 22.26869 22.50606 22.74343 22.98081 23.21818 23.45556
[57] 23.69293 23.93030 24.16768 24.40505 24.64242 24.87980 25.11717 25.35455
[65] 25.59192 25.82929 26.06667 26.30404 26.54141 26.77879 27.01616 27.25354
[73] 27.49091 27.72828 27.96566 28.20303 28.44040 28.67778 28.91515 29.15253
[81] 29.38990 29.62727 29.86465 30.10202 30.33939 30.57677 30.81414 31.05152
[89] 31.28889 31.52626 31.76364 32.00101 32.23838 32.47576 32.71313 32.95051
[97] 33.18788 33.42525 33.66263 33.90000
Using
lines, add the normal density plot to the histogram. Be sure to choose a nice color and line type (lty)!
hist(mtcars$mpg, xlab = "Miles per gallon", main = "Histogram of miles per gallon", col = "white", freq = FALSE, breaks = 10)
rug(mtcars$mpg)
lines(x, dnorm(x, mu, sigma), lty = 3, col = "black", lwd = 2)The Shapiro–Wilk test is among the most widely used tests of normality. Find the Shapiro–Wilk test in
Rand evaluate the normality ofmtcars$mpgformally.
We can’t formally reject normality. But it’s not very plausible in the first place.
main is a string now. Remove the title from the plot. (ii) Change the “box” around the plot so it has lines emanating only from the lower-left corner.par(mfrow = c(2,2))
hist(mtcars$mpg, xlab = "Miles per gallon", col = "white", main = NULL, freq = FALSE, breaks = 10)
rug(mtcars$mpg)
lines(x, dnorm(x, mu, sigma), lty = 3, col = "black", lwd = 2)
qqnorm(mtcars$mpg, main = NULL, bty = "l")
qqline(mtcars$mpg)
boxplot(mtcars$mpg, xlab = "Miles per gallon")
plot(mtcars$mpg, mtcars$disp, xlab = "Miles per gallon", ylab = "Displacement", pch = 20)plot is genericThe plot function is generic. It can be applied to many different objects, with sometimes unpredictable behaviour!
property_type buyer_type
1 flat single male
2 flat single male
3 flat single male
4 flat single male
5 flat single male
6 flat single male
single male single female married couple family
flat 40 30 16 10
bungalow 4 4 14 16
detached house 8 16 26 42
terrace 16 7 45 39
To find the documentation of a generic, you must know the class of the object you use.
Instances of generic functions can be found by placing a . behind the generic method:
Call
plot(dnorm, -5, 5). What does the plot function do now? Find its documentation.